HMR
hmrE <- 292674:292769
hmrI <- 294805:294864
chr3Files <- list.files("/Users/koenvandenberge/data/molly/Rescue",
pattern = "^chrIII", full.names = TRUE)
for(ff in 1:length(chr3Files)){
binaryCalls <- fread(file = chr3Files[ff])
colnames(binaryCalls) <- c("readID", "chr", "strand", "pos",
"mod_log_prob", "can_log_prob", "mod_base")
binaryCalls$methylation <- ifelse(binaryCalls$mod_log_prob > -0.2231436, 1, 0)
binaryCalls <- binaryCalls[,c("readID", "chr", "strand",
"pos", "methylation")]
## subset reads that completely overlap HMR
readCalls <- binaryCalls %>%
group_by(readID) %>%
summarize(start=min(pos),
end=max(pos),
avgMeth = mean(methylation),
length = n())
readsHMR <- unique(readCalls$readID[readCalls$start <= (min(hmrE) - 500) &
readCalls$end >= (max(hmrI) + 500)])
assign(paste0("binaryCalls",ff), binaryCalls[binaryCalls$readID %in% readsHMR,])
}
permute <- TRUE
# make probability matrix: relative to max avg methylation in each bin.
### HMR segments
hmrSegments <- readRDS("../data/HMR_bins.rds")
readAvMethListHMR <- list()
for(ff in 1:length(chr3Files)){ # loop timepoints
curData <- get(paste0("binaryCalls",ff))
curReads <- unique(curData$readID)
for(rr in 1:length(curReads)){ # loop reads
curRead <- curReads[rr]
curReadData <- curData[curData$readID == curRead,]
avMethLinkers <- c()
for(bb in 1:nrow(hmrSegments)){ # loop bins
curStart <- hmrSegments$start[bb]
curEnd <- hmrSegments$end[bb]
curId <- curReadData$pos >= curStart & curReadData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curReadData[curReadData$pos >= curStart & curReadData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListHMR[[rr]] <- avMethLinkers
}
avMethMatHMR <- do.call(rbind, readAvMethListHMR)
if(permute){
for(rr in 1:nrow(avMethMatHMR)) avMethMatHMR[rr,] <- avMethMatHMR[rr,sample(ncol(avMethMatHMR))]
}
# normalize by maximum average methylation in each bin
avMethMatHMRNormalizedPerBin <- sweep(avMethMatHMR, 2, STATS=apply(avMethMatHMR, 2, max, na.rm=TRUE), FUN="/")
avMethMatHMRNormalizedAll <- avMethMatHMR / max(avMethMatHMR, na.rm=TRUE)
assign(paste0("avMethMatHMRNormalizedAll", ff), avMethMatHMRNormalizedAll)
assign(paste0("avMethMatHMRNormalizedPerBin", ff), avMethMatHMRNormalizedPerBin)
}
Normalize across all
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMRNormalizedAll", ff))
conditionalProbability <- matrix(NA, nrow=nrow(hmrSegments), ncol=nrow(hmrSegments))
for(bb in 1:nrow(hmrSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(hmrSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(hmrSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HMR: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






Normalize per bin
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMRNormalizedPerBin", ff))
conditionalProbability <- matrix(NA, nrow=nrow(hmrSegments), ncol=nrow(hmrSegments))
for(bb in 1:nrow(hmrSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(hmrSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(hmrSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HMR: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






Second permutation
permute <- TRUE
# make probability matrix: relative to max avg methylation in each bin.
### HMR segments
hmrSegments <- readRDS("../data/HMR_bins.rds")
readAvMethListHMR <- list()
for(ff in 1:length(chr3Files)){ # loop timepoints
curData <- get(paste0("binaryCalls",ff))
curReads <- unique(curData$readID)
for(rr in 1:length(curReads)){ # loop reads
curRead <- curReads[rr]
curReadData <- curData[curData$readID == curRead,]
avMethLinkers <- c()
for(bb in 1:nrow(hmrSegments)){ # loop bins
curStart <- hmrSegments$start[bb]
curEnd <- hmrSegments$end[bb]
curId <- curReadData$pos >= curStart & curReadData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curReadData[curReadData$pos >= curStart & curReadData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListHMR[[rr]] <- avMethLinkers
}
avMethMatHMR <- do.call(rbind, readAvMethListHMR)
if(permute){
for(rr in 1:nrow(avMethMatHMR)) avMethMatHMR[rr,] <- avMethMatHMR[rr,sample(ncol(avMethMatHMR))]
}
# normalize by maximum average methylation in each bin
avMethMatHMRNormalizedPerBin <- sweep(avMethMatHMR, 2, STATS=apply(avMethMatHMR, 2, max, na.rm=TRUE), FUN="/")
avMethMatHMRNormalizedAll <- avMethMatHMR / max(avMethMatHMR, na.rm=TRUE)
assign(paste0("avMethMatHMRNormalizedAll", ff), avMethMatHMRNormalizedAll)
assign(paste0("avMethMatHMRNormalizedPerBin", ff), avMethMatHMRNormalizedPerBin)
}
Normalize across all
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMRNormalizedAll", ff))
conditionalProbability <- matrix(NA, nrow=nrow(hmrSegments), ncol=nrow(hmrSegments))
for(bb in 1:nrow(hmrSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(hmrSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(hmrSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HMR: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






Normalize per bin
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMRNormalizedPerBin", ff))
conditionalProbability <- matrix(NA, nrow=nrow(hmrSegments), ncol=nrow(hmrSegments))
for(bb in 1:nrow(hmrSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(hmrSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(hmrSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HMR: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






HML
hmlE <- 11237:11268
hmlI <- 14600:14711
chr3Files <- list.files("/Users/koenvandenberge/data/molly/Rescue",
pattern = "^chrIII", full.names = TRUE)
for(ff in 1:length(chr3Files)){
binaryCalls <- fread(file = chr3Files[ff])
colnames(binaryCalls) <- c("readID", "chr", "strand", "pos",
"mod_log_prob", "can_log_prob", "mod_base")
binaryCalls$methylation <- ifelse(binaryCalls$mod_log_prob > -0.2231436, 1, 0)
binaryCalls <- binaryCalls[,c("readID", "chr", "strand",
"pos", "methylation")]
## subset reads that completely overlap HMR
readCalls <- binaryCalls %>%
group_by(readID) %>%
summarize(start=min(pos),
end=max(pos),
avgMeth = mean(methylation),
length = n())
readsHMR <- unique(readCalls$readID[readCalls$start <= (min(hmlE) - 500) &
readCalls$end >= (max(hmlI) + 500)])
assign(paste0("binaryCalls",ff), binaryCalls[binaryCalls$readID %in% readsHMR,])
}
First permutation
permute <- TRUE
# make probability matrix: relative to max avg methylation in each bin.
### HML segments
HMLSegments <- readRDS("../data/HML_bins.rds")
readAvMethListHML <- list()
for(ff in 1:length(chr3Files)){ # loop timepoints
curData <- get(paste0("binaryCalls",ff))
curReads <- unique(curData$readID)
for(rr in 1:length(curReads)){ # loop reads
curRead <- curReads[rr]
curReadData <- curData[curData$readID == curRead,]
avMethLinkers <- c()
for(bb in 1:nrow(HMLSegments)){ # loop bins
curStart <- HMLSegments$start[bb]
curEnd <- HMLSegments$end[bb]
curId <- curReadData$pos >= curStart & curReadData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curReadData[curReadData$pos >= curStart & curReadData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
# names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
names(avMethLinkers)[bb] <- bb
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListHML[[rr]] <- avMethLinkers
}
avMethMatHML <- do.call(rbind, readAvMethListHML)
if(permute){
for(rr in 1:nrow(avMethMatHML)) avMethMatHML[rr,] <- avMethMatHML[rr,sample(ncol(avMethMatHML))]
}
# normalize by maximum average methylation in each bin
avMethMatHMLNormalizedPerBin <- sweep(avMethMatHML, 2, STATS=apply(avMethMatHML, 2, max, na.rm=TRUE), FUN="/")
avMethMatHMLNormalizedAll <- avMethMatHML / max(avMethMatHML, na.rm=TRUE)
assign(paste0("avMethMatHML", ff), avMethMatHML)
assign(paste0("avMethMatHMLNormalizedAll", ff), avMethMatHMLNormalizedAll)
assign(paste0("avMethMatHMLNormalizedPerBin", ff), avMethMatHMLNormalizedPerBin)
}
Normalize across all
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMLNormalizedAll", ff))
conditionalProbability <- matrix(NA, nrow=nrow(HMLSegments), ncol=nrow(HMLSegments))
for(bb in 1:nrow(HMLSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
#avConditionalProbability <- colMeans(conditionalProbability)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(HMLSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(HMLSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HML: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






# should this be correlated?
plot(x=colMeans(avMethMatHMLNormalizedAll1), y=diag(conditionalProbabilityTable1))

Normalize per bin
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMLNormalizedPerBin", ff))
conditionalProbability <- matrix(NA, nrow=nrow(HMLSegments), ncol=nrow(HMLSegments))
for(bb in 1:nrow(HMLSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
#avConditionalProbability <- colMeans(conditionalProbability)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(HMLSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(HMLSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HML: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






# should this be correlated?
plot(x=colMeans(avMethMatHMLNormalizedPerBin1), y=diag(conditionalProbabilityTable1))

Second permutation
permute <- TRUE
# make probability matrix: relative to max avg methylation in each bin.
### HML segments
HMLSegments <- readRDS("../data/HML_bins.rds")
readAvMethListHML <- list()
for(ff in 1:length(chr3Files)){ # loop timepoints
curData <- get(paste0("binaryCalls",ff))
curReads <- unique(curData$readID)
for(rr in 1:length(curReads)){ # loop reads
curRead <- curReads[rr]
curReadData <- curData[curData$readID == curRead,]
avMethLinkers <- c()
for(bb in 1:nrow(HMLSegments)){ # loop bins
curStart <- HMLSegments$start[bb]
curEnd <- HMLSegments$end[bb]
curId <- curReadData$pos >= curStart & curReadData$pos <= curEnd
if(sum(curId) > 0){
curBinData <- curReadData[curReadData$pos >= curStart & curReadData$pos <= curEnd,]
avMethLinkers[bb] <- mean(curBinData$methylation)
# names(avMethLinkers)[bb] <- nrow(curBinData) #coverage
names(avMethLinkers)[bb] <- bb
} else {
avMethLinkers[bb] <- NA
}
}
readAvMethListHML[[rr]] <- avMethLinkers
}
avMethMatHML <- do.call(rbind, readAvMethListHML)
if(permute){
for(rr in 1:nrow(avMethMatHML)) avMethMatHML[rr,] <- avMethMatHML[rr,sample(ncol(avMethMatHML))]
}
# normalize by maximum average methylation in each bin
avMethMatHMLNormalizedPerBin <- sweep(avMethMatHML, 2, STATS=apply(avMethMatHML, 2, max, na.rm=TRUE), FUN="/")
avMethMatHMLNormalizedAll <- avMethMatHML / max(avMethMatHML, na.rm=TRUE)
assign(paste0("avMethMatHML", ff), avMethMatHML)
assign(paste0("avMethMatHMLNormalizedAll", ff), avMethMatHMLNormalizedAll)
assign(paste0("avMethMatHMLNormalizedPerBin", ff), avMethMatHMLNormalizedPerBin)
}
Normalize across all
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMLNormalizedAll", ff))
conditionalProbability <- matrix(NA, nrow=nrow(HMLSegments), ncol=nrow(HMLSegments))
for(bb in 1:nrow(HMLSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
#avConditionalProbability <- colMeans(conditionalProbability)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(HMLSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(HMLSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HML: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






# should this be correlated?
plot(x=colMeans(avMethMatHMLNormalizedAll1), y=diag(conditionalProbabilityTable1))

Normalize per bin
for(ff in 1:length(chr3Files)){
#if(ff == 4) next
curMeth <- get(paste0("avMethMatHMLNormalizedPerBin", ff))
conditionalProbability <- matrix(NA, nrow=nrow(HMLSegments), ncol=nrow(HMLSegments))
for(bb in 1:nrow(HMLSegments)){
jointProbability <- colMeans(curMeth[,bb] * curMeth, na.rm=TRUE) # P(bin A, bin B)
# P(bin A | bin B) = P(bin A, bin B) / P(bin B)
conditionalProbability[,bb] <- jointProbability / colMeans(curMeth, na.rm=TRUE)
#avConditionalProbability <- colMeans(conditionalProbability)
}
# jointProbability <- crossprod(curMeth) / nrow(curMeth)
# # P(A)
# marginalProbability <- colMeans(curMeth)
# # P(A=1 | B=1) = P(A=1, B=1) / P(B=1)
# # P(B=1 | A=1) = P(A=1, B=1) / P(A=1)
# conditionalProbability <- jointProbability / marginalProbability
library(pheatmap)
library(RColorBrewer)
colpal <- colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)
breaks <- c(seq(0.05, 0.7, length=100), .8)
rownames(conditionalProbability) <- paste0("P(. | bin", 1:nrow(HMLSegments),")")
colnames(conditionalProbability) <- paste0("P(bin", 1:nrow(HMLSegments)," | .)")
pheatmap(conditionalProbability,
cluster_rows = FALSE,
cluster_cols=FALSE,
main=paste0("HML: ", time[ff]),
col = colpal, breaks = breaks)
assign(paste0("conditionalProbabilityTable", ff), conditionalProbability)
}






# should this be correlated?
plot(x=colMeans(avMethMatHMLNormalizedPerBin1), y=diag(conditionalProbabilityTable1))
